library(tidyverse)
library(haven)

source("functions_code_variables.R")

# change to where data is stored
data_path <- "../../data/replication/"

with_dems <- read_dta(paste0(data_path, "with_dems.dta"),
                      col_select = c(id, ageW21, ns_sec_analyticW21, 
                                     p_ethnicityW21, gender, p_edlevelW21))

bes <- read_dta(file = paste0(data_path, "main_bes.dta"),
                col_select = -matches(paste0("W", c(1:18), "$"))) %>%
  filter(wave21 == 1) %>% 
  select(-c(ageW21, p_ethnicityW21, gender)) %>%
  left_join(with_dems, by = c("id"))

bes_with_variables <- bes %>% 
  # Experiment variables
  mutate(experiment = case_when(
    is.na(deathTollGovFaultDVW21) &
      is.na(vaccineGovFaultDVW21) &
      is.na(covidallDVW21) &
      is.na(handleCoronavirusRetroW21) &
      is.na(strainHandleW21) ~ 0,
    TRUE ~ 1)) %>%
  mutate(treatment_w21 = factor(case_when(
      bmarktreatW21 == 1 ~ "control",
      bmarktreatW21 == 2 ~ "positive",
      bmarktreatW21 == 3 ~ "negative"),
    levels = c("control", "positive", "negative"))) %>%
  experiment_handle(ukCOVIDPerformW21, uk_covid_performance_w21) %>%
  experiment_handle_as_numbers(ukCOVIDPerformW21, 
                               uk_covid_performance_w21_num) %>%
  governments_fault(deathTollGovFaultDVW21, death_toll_gov_fault_w21) %>%
  governments_fault(vaccineGovFaultDVW21, vaccine_gov_fault_w21) %>%
  governments_fault(covidallDVW21, covid_overall_gov_fault_w21) %>%
  experiment_handle(handleCoronavirusRetroW21, retro_handle_w21) %>%
  experiment_handle_as_numbers(handleCoronavirusRetroW21, 
                               retro_handle_w21_num) %>%
  experiment_handle(strainHandleW21, new_strain_handle_w21) %>%
  experiment_handle_as_numbers(strainHandleW21, new_strain_handle_w21_num) %>%
  
  # Taking out those who don't think vaccine is safe and / or against in general
  mutate(rm_for_vaccine_attitude = case_when(
    hesitantReason_2W21 == 1 ~ 1,
    hesitantReason_4W21 == 1 ~ 1,
    TRUE ~ 0)) %>%

  # demographics
  age_four_categories(ageW20, age_w20) %>%
  age_four_categories(ageW21, age_w21) %>%
  class_seven_categories(ns_sec_analyticW20, class_w20) %>%
  class_seven_categories(ns_sec_analyticW21, class_w21) %>%
  white_dummy(p_ethnicityW20, white_british_w20) %>% 
  white_dummy(p_ethnicityW21, white_british_w21) %>% 
  male_dummy(gender, male_w20) %>%
  male_dummy(gender, male_w21) %>%
  education_five_categories(p_edlevelW20, education_w20) %>%
  education_five_categories(p_edlevelW21, education_w21) %>%
  
  # partisanship
  party_id_opposition_gov(partyIdW21, party_id_gov_op_w21) %>%
  party_id_opposition_gov(partyIdW20, party_id_gov_op_w20) %>%
  party_id_opposition_gov(partyIdW19, party_id_gov_op_w19) %>%

  # Taking out brexiters and other
  # and people who switched to Cons from opposition or who 
  # switched to opposition from Cons
  mutate(rm_for_party = case_when(
    partyIdW21 %in% c(9,12) ~ 1,
    partyIdW21 == 1 & partyIdW20 %in% c(2:5,7,11) ~ 1,
    partyIdW21 %in% c(2:5,7,11) & partyIdW20 == 1 ~ 1,
    TRUE ~ 0)) %>%

  # handling variables 
  handle_corona_to_factor(handleCoronaW20, handle_corona_w20_fct) %>%
  mutate(handle_corona_w20_num = na_if(handleCoronaW20, 9999)) %>%
  mutate(retro_handle_change_w21 = retro_handle_w21_num - 
                                   handle_corona_w20_num) %>%
  
  # handling in wave 21, but not experiment
  handle_corona_to_factor(handleCoronaW21, handle_corona_w21_fct) %>%
  mutate(handle_corona_w21_num = na_if(handleCoronaW21, 9999)) %>%

  mutate(pol_attention_w21 = na_if(polAttentionW21, 9999)) 

# adding extra weighting variable to experiment data
ftf <- read_sav(paste0(data_path, "bes_ftf.sav"))

targets <- ftf %>% filter(k01 %in% c(0:10)) %>%
  mutate(total = n()) %>%
  group_by(k01, total) %>%
  summarise(local_tot = n()) %>%
  mutate(target_perc = 100 * (local_tot / total)) %>%
  ungroup() %>%
  select(k01, target_perc) %>%
  rename(attention = k01)

# getting right distribution in experiment specifically
actual <- bes_with_variables %>% filter(!is.na(treatment_w21)) %>%
  filter(polAttentionW21 %in% c(0:10)) %>%
  mutate(total = sum(wt_new_W21)) %>%
  group_by(polAttentionW21 , total) %>%
  summarise(local_tot = sum(wt_new_W21)) %>%
  mutate(actual_perc = 100 * (local_tot / total)) %>%
  ungroup() %>%
  select(polAttentionW21, actual_perc) %>%
  rename(attention = polAttentionW21)
weights <- left_join(targets, actual) %>%
  mutate(pol_att_weight = target_perc / actual_perc) %>%
  select(attention, pol_att_weight) %>%
  rename(polAttentionW21 = attention)
ex <- bes_with_variables %>% filter(!is.na(treatment_w21)) %>%
  left_join(weights) %>%
  mutate(wt_with_pol_att_w21 = pol_att_weight * wt_new_W21) %>%
  select(-pol_att_weight)

# checking it worked
ex %>% filter(polAttentionW21 %in% c(0:10)) %>%
  mutate(total = sum(wt_with_pol_att_w21)) %>%
  group_by(polAttentionW21 , total) %>%
  summarise(local_tot = sum(wt_with_pol_att_w21)) %>%
  mutate(weighted = 100 * (local_tot / total)) %>%
  ungroup() %>%
  select(polAttentionW21, weighted) %>%
  rename(attention = polAttentionW21 ) %>%
  left_join(targets)

saveRDS(ex, paste0(data_path, "experiment_data_tidied.rds"))


